#############
#############
#############
## Stefan Müller and Sven-Oliver Proksch: 
## Nostalgia in European Party Politics:
## A Text-Based Measurement Approach
## British Journal of Political Science
##
## Script returns all tables and plots 
## reported in SI Section D
## Table A8, Table A9
#############
#############
#############

library(dplyr)  # CRAN v1.1.2 
library(tidyr)  # CRAN v1.3.0
library(readr)  # CRAN v2.1.4 
library(texreg) # CRAN v1.38.6 
library(xtable) # CRAN v1.8-4

# If the code does not run, one or more packages may have been 
# updated, which may result in errors or conflicts. You can solve this issue
# by installing the package version listed above or by using the 
# groundhog package:
# after installing groundhog using install.packages("groundhog")
# change library(name_of_package) to
# groundhog::groundhog.library(name_of_package, date = "2023-09-04")
# Instead of adjusting the library() function for each package, 
# you can adjust them at all once using the
# the following syntax:
# groundhog.library("
#                   library('pkgA')
#                   library('pkgB')
#                   library('pkgC')", date = "2023-09-04")
# More details are available at: https://groundhogr.com/using/

# load manifesto-level dataset and add additional relevant variables
dat_manifestolevel <- readRDS("data_nostalgia_manifestolevel.rds")


# load hand-coded and classified sentences (from 50 manifestos)
dat_joined <- read_csv("data_coded_gpt-3.5_clean.csv")


dat_manifestolevel_sents <- dat_manifestolevel |> 
    mutate(manifesto_id_new = manifesto_id) |> 
    select(manifesto_id_new, n_sentences_manifesto, edate,
           nost_sents_sum_base, nost_sents_sum_emb_pos,
           nost_sents_sum_pos, partyname, countryname,
           party_family_recoded, loglibcons)


# recode nostalgia score based on GPT rating
dat_joined <- dat_joined |> 
    mutate(score_gpt = as.integer(score_gpt)) |> 
    mutate(nostalgia_sentence_gpt = ifelse(score_gpt > 4, 1, 0)) # nostalgic if at lesat 5


# summarise to level of manifestos
dat_sum <- dat_joined |> 
    left_join(dat_manifestolevel_sents) |> 
    group_by(manifesto_id_new, partyname,
             countryname, edate,
             nost_sents_sum_base, 
             nost_sents_sum_pos, 
             nost_sents_sum_emb_pos,
             party_family_recoded, loglibcons,
             n_sentences_manifesto) |> 
    rename(sum_nostalgia_base = nost_sents_sum_base,
           sum_nostalgia_base_pos = nost_sents_sum_pos,
           sum_nostalgia_emb_pos = nost_sents_sum_emb_pos) |> 
    summarise(sum_nostalgia_coded_at_least_one = sum(nostalgia_coded_at_least_one),
              sum_nostalgia_coded_both = sum(nostalgia_coded_both),
              sum_nostalgia_bert = sum(nostalgia_sentence_bert),
              sum_nostalgia_gpt = sum(nostalgia_sentence_gpt),
              sum_nostalgia_svm = sum(nostalgia_sentence_svm),
              sum_nostalgia_emb = sum(nostalgia_sentence_dummy_emb)) |> 
    mutate(nostalgia_coded_at_least_one = sum_nostalgia_coded_at_least_one / n_sentences_manifesto * 1000,
           nostalgia_coded_both = sum_nostalgia_coded_both / n_sentences_manifesto * 1000,
           nostalgia_gpt = sum_nostalgia_gpt / n_sentences_manifesto * 1000,
           nostalgia_bert = sum_nostalgia_bert / n_sentences_manifesto * 1000,
           nostalgia_emb = sum_nostalgia_emb / n_sentences_manifesto * 1000,
           nostalgia_svm = sum_nostalgia_svm / n_sentences_manifesto * 1000,
           nostalgia_base = sum_nostalgia_base / n_sentences_manifesto * 1000,
           nostalgia_base_pos = sum_nostalgia_base_pos / n_sentences_manifesto * 1000,
           nostalgia_emb_pos = sum_nostalgia_emb_pos / n_sentences_manifesto * 1000) |> 
    ungroup() |> 
    select(starts_with("nostalgia_"), starts_with("sum_"), 
           party_family_recoded, loglibcons, edate,
           partyname, countryname)


# prepare data with information on all manifestos
dat_table_50man <- dat_sum |> 
    mutate(Year = substr(edate, 1, 4)) |> 
    mutate(party_family_recoded = dplyr::recode(party_family_recoded,
                                                "Christian Democratic" = "Christian Dem.",
                                                "Social Democratic" = "Social Dem.")) |> 
    select(Party = partyname,
           Year, Country = countryname, 
           `Coding (>=1)` = nostalgia_coded_at_least_one,
           `Coding (=2)` = nostalgia_coded_both,
           `Nostalgia DistilBERT` = nostalgia_bert,
           `Cultural Conservatism`= loglibcons) |> 
    arrange(-`Nostalgia DistilBERT` ) |> 
    mutate(Country = dplyr::recode(Country,
                                   "United Kingdom" = "UK",
                                   "Czech Republic" = "Czech Rep."))


# Table A08 ----
print(xtable(dat_table_50man,
             digits = 1,
             caption = "Overview of hand-coded nostalgia and DistilBERT classification for 50 randomly sampled manifestos, stratified by cultural conservatism",
             label="tab:50_manifestos",
             align= c("p{0.03\\textwidth}", 
                      "p{0.49\\textwidth}",
                      "p{0.07\\textwidth}",
                      "p{0.1\\textwidth}", 
                      "p{0.05\\textwidth}",
                      "p{0.05\\textwidth}",
                      "p{0.1\\textwidth}",
                      "p{0.08\\textwidth}")),
      type = "latex",
      digits = 1,
      size = "scriptsize",
      file="tab_a08.tex",
      include.rownames = FALSE,
      caption.placement = "top")



# rescale variables for regression models
rescale_var <- function(x) {
    (x - mean(x, na.rm = TRUE)) / sd(x, na.rm = TRUE)
}


# rescale variables
dat_sum <- dat_sum |> 
    mutate(nostalgia_at_least_one_stand = rescale_var(nostalgia_coded_at_least_one),
           nostalgia_both_stand = rescale_var(nostalgia_coded_both),
           nostalgia_bert_stand = rescale_var(nostalgia_bert),
           nostalgia_gpt_stand = rescale_var(nostalgia_gpt),
           nostalgia_base_stand = rescale_var(nostalgia_base))



countries_eastern <- c(
    "Bulgaria", "Czech Republic",
    "Estonia", "Hungary", "Latvia",
    "Lithuania", "Poland", "Slovenia"
)

countries_southern <- c(
    "Greece", "Italy", "Spain",
    "Portugal"
)

countries_nordic <- c("Finland",
                      "Iceland",
                      "Norway",
                      "Sweden",
                      "Denmark")

countries_western <- c(
    "Austria", "Belgium",
    "France", "Germany", 
    "Ireland", "Netherlands",
    "United Kingdom"
)


# create variable for region
dat_sum <- dat_sum |> 
    mutate(region = case_when(
        countryname %in% countries_nordic ~ "Northern Europe",
        countryname %in% countries_eastern ~ "Central and Eastern Europe",
        countryname %in% countries_western ~ "Western Europe",
        countryname %in% countries_southern ~ "Southern Europe"
    ))


# run regresion models on set of 50 manifestos
lm_at_least_one <- lm(nostalgia_at_least_one_stand ~ loglibcons + region, data = dat_sum)
lm_both <- lm(nostalgia_both_stand ~ loglibcons + region, data = dat_sum)
lm_bert <- lm(nostalgia_bert_stand ~ loglibcons + region, data = dat_sum)
lm_base <- lm(nostalgia_base_stand ~ loglibcons + region, data = dat_sum)
lm_gpt <- lm(nostalgia_gpt_stand ~ loglibcons + region, data = dat_sum)



screenreg(list(lm_both, lm_at_least_one, lm_bert, lm_base, lm_gpt))

screenreg(list(lm_both, lm_at_least_one, lm_bert, lm_base, lm_gpt),
          custom.coef.names = c("(Intercept)",
                                "Cultural Conservatism", 
                                "Region: Northern Europe (ref.: CCE)",
                                "Region: Southern Europe",
                                "Region: Western Europe"),
          custom.model.names = c("M1 (Coding: =2)",
                                 "M2 (Coding: >=1)",
                                 "M3 (DistilBERT)",
                                 "M4 (Dictionary)",
                                 "M5 (GPT 3.5)"))


texreg(list(lm_both, lm_at_least_one, lm_bert, lm_base, lm_gpt),
       custom.coef.names = c("(Intercept)",
                             "Cultural Conservatism", 
                             "Region: Northern E. (ref.: CCE)",
                             "Region: Southern E.",
                             "Region: Western E."),
       custom.model.names = c("M1 (Coding: =2)",
                              "M2 (Coding: >=1)",
                              "M3 (DistilBERT)",
                              "M4 (Dictionary)", 
                              "M5 (GPT 3.5)"),
       fontsize = "scriptsize",
       caption.above = TRUE,
       caption = "Predicting nostalgic rhetoric (based on human codings) in 50 party manifestos. Dependent variable measures sentences coded as nostalgic by  both coders (Model 1), at least one of two coders (Model 2), the DistilBERT approach (Model 3), the Base Dictionary (Model 4), and GPT 3.5 (Model 5) for the same set of sentences.  Dependent variables are measured as the number of nostalgic sentences per 1,000 sentences, and z-transformed for comparability. Standard erorrs in parentheses.",
       label = "tab:reg_humancoded",
       file = "tab_a09.tex")

